Let us set some global options for all code chunks in this document.

knitr::opts_chunk$set(
  message = FALSE,    # Disable messages printed by R code chunks
  warning = FALSE,    # Disable warnings printed by R code chunks
  echo = TRUE,        # Show R code within code chunks in output
  include = TRUE,     # Include both R code and its results in output
  eval = TRUE,       # Evaluate R code chunks
  cache = FALSE,       # Enable caching of R code chunks for faster rendering
  fig.align = "center",
  out.width = "100%",
  retina = 2,
  error = TRUE,
  collapse = TRUE
)
rm(list = ls())
set.seed(1982)

1 PREPROCESSING

Let us now load some required libraries.

# Load required libraries

# inla.upgrade(testing = TRUE)
# remotes::install_github("inlabru-org/inlabru", ref = "devel")
# remotes::install_github("davidbolin/rspde", ref = "devel")
# remotes::install_github("davidbolin/metricgraph", ref = "devel")
# remotes::install_github("davidbolin/ngme2", ref = "devel")

library(INLA)
#inla.setOption(num.threads = 7)
library(inlabru)
library(rSPDE)
library(MetricGraph)
library(ngme2)

library(plotly)
library(dplyr)

library(sf)

library(here)

Function standarize() below is later used to standardize the covariate SpeedLimit.

standardize <- function(x) {return((x - mean(x)) / sd(x))}

1.1 PREPARING AND ADDING THE DATA TO THE GRAPH.

We load the graph object sf_graph (which only contains weights) and the data (already graph-processed).

timeallprocedure <- Sys.time()
load(here("Graph_objects/graph_construction_19MAY24_FRC0134.RData"))
load(here("Data_files/data_day7142128_hour13_with_no_consecutive_zeros_19MAY24_FRC0134_graph_processed.RData"))
data_on_graph = data_on_graph %>% 
  dplyr::select(-datetime)

The following commands remove zero speed observations that are 1m away from the graph, and after that, they remove any speed observations that are 3m away from the graph.

to_remove = data_on_graph %>%
  filter(speed == 0, .distance_to_graph > 0.001) 

data_on_graph = setdiff(data_on_graph, to_remove) %>% 
  filter(.distance_to_graph <= 0.003)

We add data to the graph.

sf_graph$add_observations(data = data_on_graph, 
                          group = "day", 
                          normalized = TRUE, 
                          clear_obs = TRUE)

We get the values of the weights at data locations. This essentially gives us covariates from the weights.

sf_graph$edgeweight_to_data(data_loc = TRUE)

When running sf_graph$edgeweight_to_data(data_loc = TRUE), some NA values are created (because the data is grouped). We remove them below. We also standardize the SpeedLimit covariate.

data <- sf_graph$get_data() %>% 
  drop_na(-StreetName) %>% # this drops all rows with at least one NA value but without taking into account StreetName
  mutate(across(c("SpeedLimit"), ~standardize(.))) %>%
  dplyr::select(speed, SpeedLimit)

1.2 NOW WE ADD THE FINAL VERSION OF THE DATA TO THE GRAPH

We add the data again but now with the new standardized SpeedLimit covariate.

sf_graph$add_observations(data = data, 
                          group = "day", 
                          normalized = TRUE, 
                          clear_obs = TRUE)

1.3 GET THE OBSERVATION LOCATIONS (FROM THE (FINAL VERSION OF THE) DATA EXTRACTED FROM THE GRAPH)

data_final <- sf_graph$get_data()
obs.loc <- data_final |> as.data.frame() |> dplyr::select(.edge_number, .distance_on_edge)

1.4 BUILD THE MESH

h = 0.05
sf_graph$build_mesh(h = h)

1.5 GET THE VALUES OF THE COVARIATE ON THE MESH LOCATIONS

We get the value of the weights at mesh locations. This will allow us to built matrices B.sigma and B.range below. Again, sf_graph$edgeweight_to_data(mesh = TRUE, add = FALSE, return = TRUE) creates repeated information (because the data is grouped). We fix that by filtering one group. We also standardize the SpeedLimit covariate.

mesh <- sf_graph$edgeweight_to_data(mesh = TRUE, 
                                   add = FALSE, 
                                   return = TRUE) %>% 
  filter(.group == 1) %>%
  mutate(across(c("SpeedLimit"), ~standardize(.))) %>%
  dplyr:::select.data.frame(SpeedLimit)
res <- lm(speed ~ SpeedLimit,
         data = data_final)

summary(res)
## 
## Call:
## lm(formula = speed ~ SpeedLimit, data = data_final)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.584 -11.930   0.944  10.600  84.289 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 25.98017    0.09999  259.84   <2e-16 ***
## SpeedLimit   9.18346    0.09999   91.85   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.07 on 25840 degrees of freedom
## Multiple R-squared:  0.2461, Adjusted R-squared:  0.2461 
## F-statistic:  8436 on 1 and 25840 DF,  p-value: < 2.2e-16

yhat <- res$fitted.values
plot(yhat)

plot(res)

plot(data_final$SpeedLimit, data_final$speed)
abline(res, col = "red", lwd = 2)

residuals <- res$residuals
residuals2 <- data_final$speed - yhat
data_final <- data_final %>%
  mutate(residuals = residuals2,
         yhat = yhat)
ggplot() +
  geom_point(data = data_final, aes(x = .coord_x, y = .coord_y, color = speed)) +
  scale_color_viridis_c()

ggplot() +
  geom_point(data = data_final, aes(x = .coord_x, y = .coord_y, color = SpeedLimit)) +
  scale_color_viridis_c()

ggplot() +
  geom_point(data = data_final, aes(x = .coord_x, y = .coord_y, color = yhat)) +
  scale_color_viridis_c()

ggplot() +
  geom_point(data = data_final, aes(x = .coord_x, y = .coord_y, color = residuals)) +
  scale_color_viridis_c()

save.image(here(paste0("Models_output/", rmarkdown::metadata$title, ".RData")))